###############################################################################
# Main Analysis I Appendix 2019 Only
###############################################################################
###############################################################################
# Models reported in the Appendix Text, including their Tables
###############################################################################
###############################################################################
# Content
###############################################################################
# 1) Dependencies
# 2) Load Data
# 3) Modeling
# 4) Save Model Output in Tables
###############################################################################
# 1) Dependencies
###############################################################################
library(dplyr)
library(lubridate)
library(texreg)
library(MASS)
library(lme4)
###############################################################################
# 2) Load Data
###############################################################################
# Set Path
setwd(dirname(rstudioapi::getActiveDocumentContext()$path))
rm(list=ls())

# Variant with articles that mention a person ####


### Overall (M vs F):
d_overall_i <- read.csv("../data/big_newspapers_only_diff_in_diff_m_vs_f_no_split_no_presidents.csv")%>% 
                    mutate(month = month(date),
                           day = day(date)) %>% 
                    mutate(date = as.Date(paste("2021",month,day, sep = '-')))
head(d_overall_i)

### Overall (M vs F vs NO Mention):
d_overall_ii <- read.csv("../data/big_newspapers_only_diff_in_diff_m_vs_f_vs_NA_no_split_absolute_no_presidents.csv")%>% 
                    mutate(month = month(date),
                           day = day(date)) %>% 
                    mutate(date = as.Date(paste("2021",month,day, sep = '-')))
head(d_overall_ii)

### Issues (M vs F):
d_issues_i <- read.csv("../data/big_newspapers_only_diff_in_diff_m_vs_f_topic_split_no_presidents.csv") %>% 
                    mutate(month = month(date),
                          day = day(date)) %>% 
                    mutate(date = as.Date(paste("2021",month,day, sep = '-'))) 
head(d_issues_i)

### Issues (M vs F vs NO Mention):
d_issues_ii <- read.csv("../data/big_newspapers_only_diff_in_diff_m_vs_f_vs_NA_topic_split_no_presidents.csv")%>% 
                    mutate(month = month(date),
                           day = day(date)) %>% 
                    mutate(date = as.Date(paste("2021",month,day, sep = '-')))
head(d_issues_ii)


###############################################################################
# 3) Modeling
###############################################################################
# --------------------------------------------------------------------------- #
# Data Transformation Overall
# --------------------------------------------------------------------------- #
d_overall_i <- d_overall_i %>% mutate(date = as.Date(date)) %>% 
  tidyr::complete(date = seq.Date(min(date), max(date), by = "day"), 
                  year, gender, 
                  fill = list(n = 0,
                              sum_art = 0,
                              freq = 0,
                              perc = 0))

d_overall_i <- d_overall_i %>% mutate(month = month(date),
                                      day = day(date)) %>% 
  mutate(date_2 = as.Date(paste(year,month,day, sep = '-'))) %>%
  mutate(wday = weekdays(date_2))

d_overall_i <- d_overall_i %>%
  filter(gender == "f") %>%
  mutate(
    treated = case_when(year == 2015 ~ 0, year == 2019 ~ 1),
    post_treatment = case_when(date < "2021-06-14" ~ 0, 
                               date >= "2021-06-14" ~ 1),
    #date >= "2021-06-14" & year == 2019 ~ 1, 
    #date >= "2021-06-14" & year == 2015 ~ 0),
    month = month(date),
    week = week(date)
  )
head(d_overall_i)
# --------------------------------------------------------------------------- #
# Data Transformation Overall (M vs F vs NO Mention)
# --------------------------------------------------------------------------- #
d_overall_ii <- d_overall_ii %>% mutate(date = as.Date(date)) %>% 
  tidyr::complete(date = seq.Date(min(date), max(date), by = "day"), 
                  year, 
                  gender, 
                  fill = list(n_2 = 0,
                              sum_day = 0,
                              freq = 0,
                              perc = 0))

d_overall_ii <- d_overall_ii %>% mutate(month = month(date),
                                        day = day(date)) %>% 
  mutate(date_2 = as.Date(paste(year,month,day, sep = '-'))) %>%
  mutate(wday = weekdays(date_2))

d_overall_ii <- d_overall_ii %>%
  filter(gender == "f") %>%
  mutate(
    treated = case_when(year == 2015 ~ 0, year == 2019 ~ 1),
    post_treatment = case_when(date < "2021-06-14" ~ 0, 
                               date >= "2021-06-14" ~ 1),
    #date >= "2021-06-14" & year == 2019 ~ 1, 
    #date >= "2021-06-14" & year == 2015 ~ 0),
    month = month(date),
    week = week(date)
  )
head(d_overall_ii)

# --------------------------------------------------------------------------- #
# Data Transformation Issues
# --------------------------------------------------------------------------- #
d_issues_i <- d_issues_i %>% mutate(date = as.Date(date)) %>% 
  tidyr::complete(date = seq.Date(min(date), max(date), by = "day"), 
                  year, 
                  gender, 
                  selectsclass,
                  fill = list(n = 0,
                              sum_art_class = 0,
                              freq = 0,
                              perc = 0))

d_issues_i <- d_issues_i %>% mutate(month = month(date),
                                    day = day(date)) %>% 
  mutate(date_2 = as.Date(paste(year,month,day, sep = '-'))) %>%
  mutate(wday = weekdays(date_2))

d_issues_i <- d_issues_i %>%
  filter(gender == "f") %>%
  mutate(
    treated = case_when(year == 2015 ~ 0, year == 2019 ~ 1),
    post_treatment = case_when(date < "2021-06-14" ~ 0, 
                               date >= "2021-06-14" ~ 1),
    #date >= "2021-06-14" & year == 2019 ~ 1, 
    #date >= "2021-06-14" & year == 2015 ~ 0),
    month = month(date),
    week = week(date)
  )
head(d_issues_i)

unique(d_issues_i$selectsclass)
#  [1] "NotPolitical"                       "PoliticalSystem"
#  [3] "PublicServices_Infrastructure"      "SocialSecurity_WelfareState"
#  [5] "Economy"                            "Regions_NationalCohesion"
#  [7] "Agriculture"                        "Education_Culture"
#  [9] "Environment_Energy"                 "EU_Europa"
# [11] "Finances_Taxes"                     "Immigration_Asylum"
# [13] "Law_Order"                          "Other_unclassified_Political_Texts"
# [15] "GenderIssues_Discrimination"        "PublicHealth"
# [17] "LabourMarket"                       "Other_Problems"
# [19] "Not Classified"                     "InternationalRelations"
# --------------------------------------------------------------------------- #
# Data Transformation Issues (M vs F vs NO Mention)
# --------------------------------------------------------------------------- #
d_issues_ii <- d_issues_ii %>% mutate(date = as.Date(date)) %>% 
  tidyr::complete(date = seq.Date(min(date), max(date), by = "day"), 
                  year, 
                  gender, 
                  selectsclass, 
                  fill = list(n_2 = 0,
                              sum_day = 0,
                              freq = 0,
                              perc = 0))

d_issues_ii <- d_issues_ii %>% mutate(month = month(date),
                                      day = day(date)) %>% 
  mutate(date_2 = as.Date(paste(year,month,day, sep = '-'))) %>%
  mutate(wday = weekdays(date_2))

d_issues_ii <- d_issues_ii %>%
  filter(gender == "f") %>%
  mutate(
    treated = case_when(year == 2015 ~ 0, year == 2019 ~ 1),
    post_treatment = case_when(date < "2021-06-14" ~ 0, 
                               date >= "2021-06-14" ~ 1),
    #date >= "2021-06-14" & year == 2019 ~ 1, 
    #date >= "2021-06-14" & year == 2015 ~ 0),
    month = month(date),
    week = week(date)
  )
head(d_issues_ii)

unique(d_issues_ii$selectsclass)
#  [1] "NotPolitical"                       "PoliticalSystem"
#  [3] "PublicServices_Infrastructure"      "SocialSecurity_WelfareState"
#  [5] "Economy"                            "Regions_NationalCohesion"
#  [7] "Agriculture"                        "Education_Culture"
#  [9] "Environment_Energy"                 "EU_Europa"
# [11] "Finances_Taxes"                     "Immigration_Asylum"
# [13] "Law_Order"                          "Other_unclassified_Political_Texts"
# [15] "GenderIssues_Discrimination"        "PublicHealth"
# [17] "LabourMarket"                       "Other_Problems"
# [19] "Not Classified"                     "InternationalRelations"
# --------------------------------------------------------------------------- #
# Overall Linear Regression Models
# --------------------------------------------------------------------------- #
out_overall_i <- lm(perc ~ treated * post_treatment + as.factor(month), data = d_overall_i)
summary(out_overall_i)

out_overall_i <- lm(perc ~ treated * post_treatment + as.factor(month), data = subset(d_overall_i, month %in% c(1,2,3,4,5,6,7,8,9,10,11)))
summary(out_overall_i)

out_overall_ib <- lm(perc ~ treated * post_treatment + as.factor(month) + as.factor(wday), data = d_overall_i)
summary(out_overall_ib)

out_overall_ib <- lm(perc ~ treated * post_treatment + as.factor(month) + as.factor(wday), data = subset(d_overall_i, month %in% c(1,2,3,4,5,6,7,8,9,10,11)))
summary(out_overall_ib)

# --------------------------------------------------------------------------- #
# Overall Negative Binomial Regression Models
# --------------------------------------------------------------------------- #
out_overall_i_nb <- glm.nb(n ~ treated * post_treatment + as.factor(month), data = d_overall_i)
summary(out_overall_i_nb)

out_overall_i_nb <- glm.nb(n ~ treated * post_treatment + as.factor(month), data = subset(d_overall_i, month %in% c(1,2,3,4,5,6,7,8,9,10,11)))
summary(out_overall_i_nb)

out_overall_ib_nb <- glm.nb(n ~ treated * post_treatment + as.factor(month) + as.factor(wday), data = d_overall_i)
summary(out_overall_ib_nb)

out_overall_ib_nb <- glm.nb(n ~ treated * post_treatment + as.factor(month) + as.factor(wday), data = subset(d_overall_i, month %in% c(1,2,3,4,5,6,7,8,9,10,11)))
summary(out_overall_ib_nb)
# --------------------------------------------------------------------------- #
# Issues Linear Regression Models
# --------------------------------------------------------------------------- #
out_gender_i <- lm(perc ~ treated * post_treatment + as.factor(month) + as.factor(wday), subset(d_issues_i, selectsclass == "Gender"))
summary(out_gender_i)

out_eu_i <- lm(perc ~ treated * post_treatment + as.factor(month) + as.factor(wday), subset(d_issues_i, selectsclass == "Europe"))
summary(out_eu_i)

out_immigration_i <- lm(perc ~ treated * post_treatment + as.factor(month) + as.factor(wday), subset(d_issues_i, selectsclass == "Immigration"))
summary(out_immigration_i)

out_environment_i <- lm(perc ~ treated * post_treatment + as.factor(month) + as.factor(wday), subset(d_issues_i, selectsclass == "Environment"))
summary(out_environment_i)

out_economy_i <- lm(perc ~ treated * post_treatment + as.factor(month) + as.factor(wday), subset(d_issues_i, selectsclass == "Economy"))
summary(out_economy_i)

out_health_i <- lm(perc ~ treated * post_treatment + as.factor(month) + as.factor(wday), subset(d_issues_i, selectsclass == "Public Health"))
summary(out_health_i)
# --------------------------------------------------------------------------- #
# Issues Negative Binomial Regression Models
# --------------------------------------------------------------------------- #
out_gender_i_nb <- glm.nb(n ~ treated * post_treatment + as.factor(month) + as.factor(wday), subset(d_issues_i, selectsclass == "Gender"))
summary(out_gender_i_nb)

out_eu_i_nb <- glm.nb(n ~  treated * post_treatment + as.factor(month) + as.factor(wday), subset(d_issues_i, selectsclass == "Europe"))
summary(out_eu_i_nb)

out_immigration_i_nb <- glm.nb(n ~ treated * post_treatment + as.factor(month) + as.factor(wday), subset(d_issues_i, selectsclass == "Immigration"))
summary(out_immigration_i_nb)

out_environment_i_nb <- glm.nb(n ~ treated * post_treatment + as.factor(month) + as.factor(wday), subset(d_issues_i, selectsclass == "Environment"))
summary(out_environment_i_nb)

out_economy_i_nb <- glm.nb(n ~ treated * post_treatment + as.factor(month) + as.factor(wday), subset(d_issues_i, selectsclass == "Economy"))
summary(out_economy_i_nb)

out_health_i_nb <- glm.nb(n ~ treated * post_treatment + as.factor(month) + as.factor(wday), subset(d_issues_i, selectsclass == "Public Health"))
summary(out_health_i_nb)
# --------------------------------------------------------------------------- #
# Overall Linear Regression Models  (M vs F vs NO Mention)
# --------------------------------------------------------------------------- #
# Variant with all articles (including no mentions) ####
out_overall_ii <- lm(perc ~ treated * post_treatment + as.factor(month), data = d_overall_ii)
summary(out_overall_ii)

out_overall_ii <- lm(perc ~ treated * post_treatment + as.factor(month), data = subset(d_overall_ii, month %in% c(1,2,3,4,5,6,7,8,9,10,11)))
summary(out_overall_ii)

out_overall_iib <- lm(perc ~ treated * post_treatment + as.factor(month) + as.factor(wday), data = d_overall_ii)
summary(out_overall_iib)

out_overall_iib <- lm(perc ~ treated * post_treatment + as.factor(month) + as.factor(wday), data = subset(d_overall_ii, month %in% c(1,2,3,4,5,6,7,8,9,10,11)))
summary(out_overall_iib)
# --------------------------------------------------------------------------- #
# Overall Negative Binomial  Regression Models  (M vs F vs NO Mention)
# --------------------------------------------------------------------------- #
# Variant with all articles (including no mentions) ####
out_overall_ii_nb <- glm.nb(n_2 ~ treated * post_treatment + as.factor(month), data = d_overall_ii)
summary(out_overall_ii_nb)

out_overall_ii_nb <- glm.nb(n_2 ~ treated * post_treatment + as.factor(month), data = subset(d_overall_ii, month %in% c(1,2,3,4,5,6,7,8,9,10,11)))
summary(out_overall_ii_nb)

out_overall_iib_nb <- glm.nb(n_2 ~ treated * post_treatment + as.factor(month) + as.factor(wday), data = d_overall_ii)
summary(out_overall_iib_nb)

out_overall_iib_nb <- glm.nb(n_2 ~ treated * post_treatment + as.factor(month) + as.factor(wday), data = subset(d_overall_ii, month %in% c(1,2,3,4,5,6,7,8,9,10,11)))
summary(out_overall_iib_nb)
# --------------------------------------------------------------------------- #
# Issues Linear Regression Models  (M vs F vs NO Mention)
# --------------------------------------------------------------------------- #
out_gender_ii <- lm(perc ~ treated * post_treatment + as.factor(month) + as.factor(wday), subset(d_issues_ii, selectsclass == "Gender"))
summary(out_gender_ii)

out_eu_ii <- lm(perc ~ treated * post_treatment + as.factor(month) + as.factor(wday), subset(d_issues_ii, selectsclass == "Europe"))
summary(out_eu_ii)

out_immigration_ii <- lm(perc ~ treated * post_treatment + as.factor(month) + as.factor(wday), subset(d_issues_ii, selectsclass == "Immigration"))
summary(out_immigration_ii)

out_environment_ii <- lm(perc ~  treated * post_treatment + as.factor(month) + as.factor(wday), subset(d_issues_ii, selectsclass == "Environment"))
summary(out_environment_ii)

out_economy_ii <- lm(perc ~ treated * post_treatment + as.factor(month) + as.factor(wday), subset(d_issues_ii, selectsclass == "Economy"))
summary(out_economy_ii)

out_health_ii <- lm(perc ~ treated * post_treatment + as.factor(month) + as.factor(wday), subset(d_issues_ii, selectsclass == "Public Health"))
summary(out_health_ii)
# --------------------------------------------------------------------------- #
# Issues Negative Binomial Regression Models  (M vs F vs NO Mention)
# --------------------------------------------------------------------------- #
out_gender_ii_nb <- glm.nb(n_2 ~  treated * post_treatment + as.factor(month) + as.factor(wday), subset(d_issues_ii, selectsclass == "Gender"))
summary(out_gender_ii_nb)

out_eu_ii_nb <- glm.nb(n_2 ~ treated * post_treatment + as.factor(month) + as.factor(wday), subset(d_issues_ii, selectsclass == "Europe"))
summary(out_eu_ii_nb)

out_immigration_ii_nb <- glm.nb(n_2 ~  treated * post_treatment + as.factor(month) + as.factor(wday), subset(d_issues_ii, selectsclass == "Immigration"))
summary(out_immigration_ii_nb)

out_environment_ii_nb <- glm.nb(n_2 ~ treated * post_treatment + as.factor(month) + as.factor(wday), subset(d_issues_ii, selectsclass == "Environment"))
summary(out_environment_ii_nb)

out_economy_ii_nb <- glm.nb(n_2 ~ treated * post_treatment + as.factor(month) + as.factor(wday), subset(d_issues_ii, selectsclass == "Economy"))
summary(out_economy_ii_nb)

out_health_ii_nb <- glm.nb(n_2 ~ treated * post_treatment + as.factor(month) + as.factor(wday), subset(d_issues_ii, selectsclass == "Public Health"))
summary(out_health_ii_nb)
###############################################################################
# 4) Save Model Output in Tables
###############################################################################
# ----------------------------------------------------------------------------#
# Linear OLS Models
# ----------------------------------------------------------------------------#
## Tabelle
htmlreg(list(out_overall_i, out_overall_ib, out_gender_i, out_environment_i, out_eu_i, out_immigration_i, out_economy_i, out_health_i),
        file = "../tables_appendix/diff-in-diff_mf_big_newspapers_only.html",
        custom.model.names = c("Overall", "Overall", "Gender", "Environment", "Europe", "Immigration", "Economy", "Health")#,
        # omit.coef = "(partei_)|(kanton_)"
)


texreg::texreg(list(out_overall_i,out_overall_ib, out_gender_i, out_environment_i, out_eu_i, out_immigration_i),
               file="../tables_appendix/diff-in-diff_big_newspapers_only.tex",
               custom.model.names = c("Overall", "Overall", "Gender", "Environment", "Europe", "Immigration"),
               custom.gof.rows = list("Month FEs" = c("\\checkmark","\\checkmark", "\\checkmark","\\checkmark", "\\checkmark","\\checkmark"),
                                      "Weekdays FEs" = c("", "\\checkmark", "\\checkmark", "\\checkmark", "\\checkmark", "\\checkmark")),
               custom.coef.map=list("treated"="Strike Year (2019)",
                                    "post_treatment"="After women's strike",
                                    "treated:post_treatment" = "Strike Year (2019) * After Women Strike",
                                    "(Intercept)"="(Intercept)"),
               label="table:diffindiff",
               float.pos="h",
               caption="Statistical ordinary least squares models of the number of articles mentioning at least one female candidate.")

## Tabelle
htmlreg(list(out_overall_ii, out_overall_iib, out_gender_ii, out_environment_ii, out_eu_ii, out_immigration_ii, out_economy_ii, out_health_ii),
        file = "../tables_appendix/diff-in-diff-mfNA__big_newspapers_only.html",
        custom.model.names = c("Overall", "Overall", "Gender", "Environment", "Europe", "Immigration", "Economy", "Health")#,
        # omit.coef = "(partei_)|(kanton_)"
)


texreg::texreg(list(out_overall_ii, out_overall_iib, out_gender_ii, out_environment_ii, out_eu_ii, out_immigration_ii),
               file="../tables_appendix/app-diff-in-diff_mfNA_big_newspapers_only.tex",
               custom.model.names = c("Overall", "Overall", "Gender", "Environment", "Europe", "Immigration"),
               custom.gof.rows = list("Month FEs" = c("\\checkmark", "\\checkmark", "\\checkmark","\\checkmark", "\\checkmark","\\checkmark"),
                                      "Weekdays FEs" = c("", "\\checkmark", "\\checkmark", "\\checkmark", "\\checkmark", "\\checkmark")),
               custom.coef.map=list("treated"="Strike Year (2019)",
                                    "post_treatment"="After women's strike",
                                    "treated:post_treatment" = "Strike Year (2019) * After Women Strike",
                                    "(Intercept)"="(Intercept)"),
               label="table:appdiffindiff",
               float.pos="h",
               caption="Statistical OLS models of the number of articles mentioning at least one female candidate. Robustness check for Table \\ref{table:diffindiff} with share of all articles, rather than only those mentioning candidates.")


# ----------------------------------------------------------------------------#
# Linear Negative Binomial Models
# ----------------------------------------------------------------------------#
## Tabelle
htmlreg(list(out_overall_i_nb, out_overall_ib_nb, out_gender_i_nb, out_environment_i_nb, out_eu_i_nb, out_immigration_i_nb, out_economy_i_nb, out_health_i_nb),
        file = "../tables_appendix/diff-in-diff_mf_big_newspapers_only_negbin.html",
        custom.model.names = c("Overall", "Overall", "Gender", "Environment", "Europe", "Immigration", "Economy", "Health")#,
        # omit.coef = "(partei_)|(kanton_)"
)


texreg::texreg(list(out_overall_i_nb,out_overall_ib_nb, out_gender_i_nb, out_environment_i_nb, out_eu_i_nb, out_immigration_i_nb),
               file="../tables_appendix/diff-in-diff_big_newspapers_only_negbin.tex",
               custom.model.names = c("Overall", "Overall", "Gender", "Environment", "Europe", "Immigration"),
               custom.gof.rows = list("Month FEs" = c("\\checkmark", "\\checkmark", "\\checkmark","\\checkmark", "\\checkmark","\\checkmark"),
                                      "Weekdays FEs" = c("", "\\checkmark", "\\checkmark", "\\checkmark", "\\checkmark", "\\checkmark")),
               custom.coef.map=list("treated"="Strike Year (2019)",
                                    "post_treatment"="After women's strike",
                                    "treated:post_treatment" = "Strike Year (2019) * After Women Strike",
                                    "(Intercept)"="(Intercept)"),
               label="table:diffindiff",
               float.pos="h",
               caption="Statistical negative binomial linear models of the share of articles mentioning at least one female candidate.")

## Tabelle
htmlreg(list(out_overall_ii_nb, out_overall_iib_nb, out_gender_ii_nb, out_environment_ii_nb, out_eu_ii_nb, out_immigration_ii_nb, out_economy_ii_nb, out_health_ii_nb),
        file = "../tables_appendix/diff-in-diff-mfNA_big_newspapers_only_negbin.html",
        custom.model.names = c("Overall", "Overall", "Gender", "Environment", "Europe", "Immigration", "Economy", "Health")#,
        # omit.coef = "(partei_)|(kanton_)"
)


texreg::texreg(list(out_overall_ii_nb, out_overall_iib_nb, out_gender_ii_nb, out_environment_ii_nb, out_eu_ii_nb, out_immigration_ii_nb),
               file="../tables_appendix/app-diff-in-diff_mfNA_big_newspapers_only_negbin.tex",
               custom.model.names = c("Overall", "Overall", "Gender", "Environment", "Europe", "Immigration"),
               custom.gof.rows = list("Month FEs" = c("\\checkmark", "\\checkmark", "\\checkmark","\\checkmark", "\\checkmark","\\checkmark"),
                                      "Weekdays FEs" = c("", "\\checkmark", "\\checkmark", "\\checkmark", "\\checkmark", "\\checkmark")),
               custom.coef.map=list("treated"="Strike Year (2019)",
                                    "post_treatment"="After women's strike",
                                    "treated:post_treatment" = "Strike Year (2019) * After Women Strike",
                                    "(Intercept)"="(Intercept)"),
               label="table:appdiffindiff",
               float.pos="h",
               caption="Statistical negative binomial models of the number of articles mentioning at least one female candidate. Robustness check for Table \\ref{table:diffindiff} with share of all articles, rather than only those mentioning candidates.")



